package data_deepprocessing.algorithm.cvalue_ncvalue_tfidf.tf_idf;

import java.util.Arrays;
import java.util.HashSet;
import java.util.Set;

import uk.ac.shef.dcs.oak.jate.JATEException;
import uk.ac.shef.dcs.oak.jate.model.Term;

/** 
* @author  作者 : YUHU YUAN
* @date 创建时间：2017年3月12日 上午9:39:11 
* @version 1.0  
*/

public class TfidfAlgorithm {
	
	private TfIdfService tfIdfService;
	
	public Term[] execute(TfIdfService tfIdfService) throws JATEException {
		Set<Term> result = new HashSet<Term>();
        double totalDocs = (double) tfIdfService.getTotalDocs();
		for(String s: tfIdfService.getTermList()){
			double tf =  (double)tfIdfService.getTermFreMap().get(s)/((double) tfIdfService.getTotalTermFreq()+1.0);
			double df_i =  (double)tfIdfService.getTermInDocCountMap().get(s) ==0? 1:(double)tfIdfService.getTermInDocCountMap().get(s);
			double idf = Math.log(totalDocs /df_i);
			result.add(new Term(s,tf*idf));
		}
		Term[] all  = result.toArray(new Term[0]);
		Arrays.sort(all);
		return all;
	}

	
	public TfIdfService getTfIdfService() {
		return tfIdfService;
	}

	public void setTfIdfService(TfIdfService tfIdfService) {
		this.tfIdfService = tfIdfService;
	}

	
}
















